  
finetune_generation:
  default:
    model_name_or_path: __internal_testing__/opt
    num_train_epochs: 3
    learning_rate: 3e-5
    warmup_ratio: 0.06
    weight_decay: 0.1
    label_smoothing: 0.1
    save_steps: 10
    max_steps: 9
    logging_steps: 10
    tensor_parallel_degree: 1
    eval_steps: 10000 
    output_dir: ./checkpoints/opt-1.3b
    src_length: 608
    device: cpu
    tgt_length: 160
    min_tgt_length: 1
    length_penalty: 0.7
    no_repeat_ngram_size: 3
    num_beams: 5
    select_topk: True
    per_device_eval_batch_size: 2
    per_device_train_batch_size: 2
    max_grad_norm: 1.0
    lr_scheduler_type: linear
    overwrite_output_dir: true
    fp16_opt_level: O1
    fp16: true
    recompute: true
    do_train: true
    do_eval: false
  slow:
    model_name_or_path: facebook/opt-125m
    num_train_epochs: 3
    learning_rate: 3e-5
    warmup_ratio: 0.06
    weight_decay: 0.1
    label_smoothing: 0.1
    save_steps: 10
    max_steps: 9
    logging_steps: 10
    tensor_parallel_degree: 1
    eval_steps: 10000 
    output_dir: ./checkpoints/opt-1.3b
    src_length: 608
    tgt_length: 160
    min_tgt_length: 1
    length_penalty: 0.7
    no_repeat_ngram_size: 3
    num_beams: 5
    select_topk: True
    per_device_eval_batch_size: 2
    per_device_train_batch_size: 2
    max_grad_norm: 1.0
    lr_scheduler_type: linear
    overwrite_output_dir: true
    fp16_opt_level: O1
    fp16: true
    recompute: true
    do_train: true
    do_eval: false

export_generation:
  default:
    model_name_or_path: __internal_testing__/opt

  slow:
    model_name_or_path: facebook/opt-125m 

predict_generation:
  default:
    model_name_or_path: __internal_testing__/opt

  slow:
    model_name_or_path: facebook/opt-125m